*** LIS Cross-section Data center in Luxembourg

* email: usersupport@lisdatacenter.org 

*** LIS Self Teaching Package 2022
*** Part II: Gender, employment, and wages
*** Stata version

* last change of this version of the syntax: 15-01-2022.


** Exercise 7: Wage regressions

global varshh "hid own" 
global varspp "hid dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1" 
global datasets "us04 be04 gr04" 

program define make_data 
foreach ccyy in $datasets { 
use $varspp using $`ccyy'p, clear 
merge m:1 hid using $`ccyy'h, keepusing($varshh) 
keep if inrange(age,25,54) & relation<=2200 
if "`ccyy'" != "us04" { 
append using ${mydata}exercise2_LIS 
} 
save ${mydata}exercise2_LIS, replace 
} 
end
 
program define recode_data 
recode own (100/199=1) (200/299=0), gen(homeowner) 
recode ageyoch (. 18/max = 0 "no children <18") (0/5 = 1 "<6 years") (6/17 = 2 "6-17   years"), gen(achildcat)
label var achildcat "Lowest age of own children"  
gen hourwage = hwage1
replace hourwage=0 if hwage1<0
gen hourwage_log=log(hourwage) 
* keep negatives and 0 in the overall distribution of non-missing dhi 
replace hourwage_log=0 if hourwage_log==. & hourwage!=.  
foreach ccyy in $datasets {
sum hourwage_log [aw=ppopwgt] if dname=="`ccyy'", de
if "`ccyy'" == "us04" {
gen iqr=r(p75)-r(p25) if dname=="`ccyy'"
* detect upper bound for extreme values 
gen upper_bound=r(p75) + (iqr * 3) if dname=="`ccyy'"
gen lower_bound=r(p25) - (iqr * 3) if dname=="`ccyy'"
}
if "`ccyy'" != "us04" {
replace iqr=r(p75)-r(p25) if dname=="`ccyy'"
* detect upper bound for extreme values 
replace upper_bound=r(p75) + (iqr * 3) if dname=="`ccyy'"
replace lower_bound=r(p25) - (iqr * 3) if dname=="`ccyy'"
}
* top code income at upper bound for extreme values 
replace hourwage=exp(upper_bound) if hourwage>exp(upper_bound) & !mi(hourwage) & dname=="`ccyy'"
* bottom code income at lower bound for extreme values 
replace hourwage=exp(lower_bound) if hourwage<exp(lower_bound) & !mi(hourwage) & dname=="`ccyy'"
} 
label values educ_c . 
gen logwage = ln(hourwage) 
gen agesq=age^2 
recode achildcat (1=1) (0 2=0) (else=.), gen(youngchild) 
recode achildcat (2=1) (0 1=0) (else=.), gen(oldchild) 
recode educ (2=1) (1 3=0) (else=.), gen(mededuc) 
recode educ (3=1) (1 2=0) (else=.), gen(hieduc) 
end 

program define get_estimates 
foreach g in 1 2 { 
foreach ccyy in $datasets { 
quietly eststo: regress logwage age agesq mededuc hieduc immigr partner youngchild oldchild ptime1 homeowner [pw=ppopwgt] if sex==`g' & dname=="`ccyy'", vce(robust) 
} 
esttab, b(a2) se(a2) r2(a3) mtitles($datasets) 
eststo clear 
} 
end 
 
quietly make_data 
use ${mydata}exercise2_LIS, clear 
quietly recode_data 
get_estimates
